#ifndef SIMPLE_LOCK_FREE_QUEUE_H

// Fairly simple, yet correct, implementation of a simple lock-free queue based on linked pointers with CAS
template<typename T>
class SimpleLockFreeQueue {
public:
    // Total maximum capacity: 2**39 (half a terabyte's worth -- off-by-one aligned indices)
    static const int UBER_BLOCKS = 256;
    static const int UBER_BLOCK_SIZE = 256;
    static const int ULTRA_BLOCK_SIZE = 256;
    static const int SUPER_BLOCK_SIZE = 256;
    static const int BLOCK_SIZE = 128;

private:
    static const uint64_t VERSION_MASK = 0xFFFFFF0000000000ULL;
    static const uint64_t VERSION_INCR = 0x0000010000000000ULL;
    static const uint64_t UBER_BLOCK_IDX_MASK = 0xFF00000000ULL;
    static const uint64_t UBER_BLOCK_MASK = 0x00FF000000ULL;
    static const uint64_t ULTRA_BLOCK_MASK = 0x0000FF0000ULL;
    static const uint64_t SUPER_BLOCK_MASK = 0x000000FF00ULL;
    static const uint64_t BLOCK_MASK = 0x00000000FEULL;

    static const uint64_t UBER_BLOCK_IDX_SHIFT = 32;
    static const uint64_t UBER_BLOCK_SHIFT = 24;
    static const uint64_t ULTRA_BLOCK_SHIFT = 16;
    static const uint64_t SUPER_BLOCK_SHIFT = 8;
    static const uint64_t BLOCK_SHIFT = 1;

    typedef std::uint64_t idx_t;

public:
    SimpleLockFreeQueue()
            : nextNodeIdx(2), freeListHead(0) {
        // Invariants: Head and tail are never null
        auto initialNode = allocate_blank_node();
        head.store(set_consumed_flag(initialNode), std::memory_order_relaxed);
        tail.store(initialNode, std::memory_order_relaxed);
        std::atomic_thread_fence(std::memory_order_seq_cst);
    }

    ~SimpleLockFreeQueue() {
        std::atomic_thread_fence(std::memory_order_seq_cst);
        idx_t idx = head.load(std::memory_order_relaxed);
        if (is_consumed(idx)) {
            idx = clear_consumed_flag(idx);
            auto node = get_node_at(idx);
            auto next = node->next.load(std::memory_order_relaxed);
            node->~Node();
            idx = next;
        }
        while (idx != 0) {
            auto node = get_node_at(idx);
            auto next = node->next.load(std::memory_order_relaxed);
            node->item()->~T();
            node->~Node();
            idx = next;
        }

        idx = freeListHead.load(std::memory_order_relaxed);
        while (idx != 0) {
            auto node = get_node_at(idx);
            auto next = node->next.load(std::memory_order_relaxed);
            node->~Node();
            idx = next;
        }
    }

    template<typename U>
    inline bool enqueue(U &&item) {
        idx_t nodeIdx = allocate_node_for(std::forward<U>(item));

        auto tail_ = tail.load(std::memory_order_relaxed);
        while (!tail.compare_exchange_weak(tail_, nodeIdx, std::memory_order_release, std::memory_order_relaxed))
            continue;
        get_node_at(tail_)->next.store(nodeIdx, std::memory_order_release);

        return true;
    }

    inline bool try_dequeue(T &item) {
        while (true) {
            auto rawHead_ = head.load(std::memory_order_acquire);
            auto head_ = clear_consumed_flag(rawHead_);
            auto headNode = get_node_at(head_);
            auto next = headNode->next.load(std::memory_order_relaxed);
            if (next == 0) {
                // Can't move head (that would make head null), but can try to dequeue the node at head anyway
                if (is_consumed(rawHead_)) {
                    return false;
                }

                if (head.compare_exchange_strong(head_, set_consumed_flag(head_), std::memory_order_release,
                                                 std::memory_order_relaxed)) {
                    // Whee, we own the right to dequeue this item
                    item = std::move(*headNode->item());
                    headNode->item()->~T();
                    return true;
                }
            }
            else {
                // Remove node whether it's already been consumed or not; if it hasn't been consumed, consume it!

                // head_->next can't possibly change, since once it's not null nobody writes to it (and ABA is avoided with versioning)
                if (head.compare_exchange_weak(rawHead_, next, std::memory_order_acq_rel, std::memory_order_relaxed)) {
                    // Aha, we successfully moved the head. But does it have anything in it?
                    if (!is_consumed(rawHead_)) {
                        item = std::move(*headNode->item());
                        headNode->item()->~T();
                    }

                    add_node_to_free_list(head_, headNode);

                    if (!is_consumed(rawHead_)) {
                        return true;
                    }
                }
            }
        }
    }

private:
    struct Node {
        std::atomic<idx_t> next;

        alignas(T) char rawItem[sizeof(T)];

        template<typename U>
        Node(U &&item)
                : next(0) {
            new(this->item()) T(std::forward<U>(item));
        }

        Node()
                : next(0) {
        }

        inline T *item() { return reinterpret_cast<T *>(rawItem); }
    };

    struct Block {
        alignas(Node) char nodes[sizeof(Node) * BLOCK_SIZE];

        inline char *node_pos(idx_t idx) { return nodes + ((idx & BLOCK_MASK) >> BLOCK_SHIFT) * sizeof(Node); }
    };

    template<typename TSubBlock, int BlockSize>
    struct HigherOrderBlock {
        std::atomic<TSubBlock *> subblocks[BlockSize];

        HigherOrderBlock() {
            for (int i = 0; i != BlockSize; ++i) {
                subblocks[i].store(nullptr, std::memory_order_release);
            }
        }

        ~HigherOrderBlock() {
            for (int i = 0; i != BlockSize; ++i) {
                if (subblocks[i].load(std::memory_order_relaxed) != nullptr) {
                    delete subblocks[i].load(std::memory_order_relaxed);
                }
            }
        }
    };

    typedef HigherOrderBlock<Block, SUPER_BLOCK_SIZE> SuperBlock;
    typedef HigherOrderBlock<SuperBlock, ULTRA_BLOCK_SIZE> UltraBlock;
    typedef HigherOrderBlock<UltraBlock, UBER_BLOCK_SIZE> UberBlock;
    typedef HigherOrderBlock<UberBlock, UBER_BLOCKS> UberBlockContainer;

private:
    inline idx_t set_consumed_flag(idx_t idx) {
        return idx | (idx_t) 1;
    }

    inline idx_t clear_consumed_flag(idx_t idx) {
        return idx & ~(idx_t) 1;
    }

    inline bool is_consumed(idx_t idx) {
        return (idx & 1) != 0;
    }

    inline void add_node_to_free_list(idx_t idx, Node *node) {
        auto head = freeListHead.load(std::memory_order_relaxed);
        do {
            node->next.store(head, std::memory_order_relaxed);
        } while (!freeListHead.compare_exchange_weak(head, idx, std::memory_order_release, std::memory_order_relaxed));
    }

    inline idx_t try_get_node_from_free_list() {
        auto head = freeListHead.load(std::memory_order_acquire);
        while (head != 0 &&
               !freeListHead.compare_exchange_weak(head, get_node_at(head)->next.load(std::memory_order_relaxed),
                                                   std::memory_order_acquire)) {
            continue;
        }

        if (head != 0) {
            // Increment version
            head = (head & ~VERSION_MASK) | ((head + VERSION_INCR) & VERSION_MASK);
        }
        return head;
    }

    inline Node *get_node_at(idx_t idx) {
        auto uberBlock = uberBlockContainer.subblocks[(idx & UBER_BLOCK_IDX_MASK) >> UBER_BLOCK_IDX_SHIFT].load(
                std::memory_order_relaxed);
        auto ultraBlock = uberBlock->subblocks[(idx & UBER_BLOCK_MASK) >> UBER_BLOCK_SHIFT].load(
                std::memory_order_relaxed);
        auto superBlock = ultraBlock->subblocks[(idx & ULTRA_BLOCK_MASK) >> ULTRA_BLOCK_SHIFT].load(
                std::memory_order_relaxed);
        auto block = superBlock->subblocks[(idx & SUPER_BLOCK_MASK) >> SUPER_BLOCK_SHIFT].load(
                std::memory_order_relaxed);
        return reinterpret_cast<Node *>(block->node_pos(idx));
    }

    template<typename U>
    inline idx_t allocate_node_for(U &&item) {
        auto idx = try_get_node_from_free_list();
        if (idx != 0) {
            auto node = get_node_at(idx);
            node->next.store(0, std::memory_order_relaxed);
            new(node->item()) T(std::forward<U>(item));
            return idx;
        }
        new(new_node_address(idx)) Node(std::forward<U>(item));
        return idx;
    }

    inline idx_t allocate_blank_node() {
        idx_t idx;
        new(new_node_address(idx)) Node();
        return idx;
    }

    inline char *new_node_address(idx_t &idx) {
        idx = nextNodeIdx.fetch_add(static_cast<idx_t>(1) << BLOCK_SHIFT, std::memory_order_relaxed);

        std::size_t uberBlockContainerIdx = (idx & UBER_BLOCK_IDX_MASK) >> UBER_BLOCK_IDX_SHIFT;
        std::size_t uberBlockIdx = (idx & UBER_BLOCK_MASK) >> UBER_BLOCK_SHIFT;
        std::size_t ultraBlockIdx = (idx & ULTRA_BLOCK_MASK) >> ULTRA_BLOCK_SHIFT;
        std::size_t superBlockIdx = (idx & SUPER_BLOCK_MASK) >> SUPER_BLOCK_SHIFT;

        auto uberBlock = lookup_subblock<UberBlockContainer, UberBlock>(&uberBlockContainer, uberBlockContainerIdx);
        auto ultraBlock = lookup_subblock<UberBlock, UltraBlock>(uberBlock, uberBlockIdx);
        auto superBlock = lookup_subblock<UltraBlock, SuperBlock>(ultraBlock, ultraBlockIdx);
        auto block = lookup_subblock<SuperBlock, Block>(superBlock, superBlockIdx);
        return block->node_pos(idx);
    }

    template<typename TBlock, typename TSubBlock>
    inline TSubBlock *lookup_subblock(TBlock *block, std::size_t idx) {
        auto ptr = block->subblocks[idx].load(std::memory_order_acquire);
        if (ptr == nullptr) {
            auto newBlock = new TSubBlock();
            if (!block->subblocks[idx].compare_exchange_strong(ptr, newBlock, std::memory_order_release,
                                                               std::memory_order_acquire)) {
                delete newBlock;
            }
            else {
                ptr = newBlock;
            }
        }
        return ptr;
    }

private:
    std::atomic<idx_t> nextNodeIdx;
    std::atomic<idx_t> head;      // 头
    std::atomic<idx_t> tail;      // 尾部
    std::atomic<idx_t> freeListHead;

    UberBlockContainer uberBlockContainer;
};

#endif